Reading & Saving the Data

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
years <- 2017:2020
quarters <- 1:4
types <- c("Electric", "Gas")
months <- 1:42
pge_elec_gas <- NULL

for (quarter in quarters){
  for (year in years){
    for (type in types){
       if ((quarter %in% 3:4) & (year %in% 2020)){
        next 
       }
      
      filename <- 
        paste0(
          "PGE_",
          year, 
          "_Q", 
          quarter,
          "_",
          type,
          "UsageByZip.csv"
        )
     
      print(filename)
      
      
      temp <- read_csv(filename)
      
      if(type %in% "Gas"){
        print(temp)
        temp <- temp %>% 
          mutate(TOTALKBTU = 100 * TOTALTHM) %>% 
          select(-TOTALTHM, -AVERAGETHM)
      } else {
        temp <- temp %>% 
        mutate(TOTALKBTU = 3.412 * TOTALKWH) %>% 
          select(-TOTALKWH, -AVERAGEKWH)
      }
      
      pge_elec_gas <- rbind(pge_elec_gas, temp)

    }
  }
}
## [1] "PGE_2017_Q1_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2017_Q1_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,486 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     1  2017 Gas- Commerc~ Y                     0        0         NA
##  2   92304     2  2017 Gas- Commerc~ Y                     0        0         NA
##  3   92304     3  2017 Gas- Commerc~ Y                     0        0         NA
##  4   92365     1  2017 Gas- Commerc~ Y                     0        0         NA
##  5   92365     2  2017 Gas- Commerc~ Y                     0        0         NA
##  6   92365     3  2017 Gas- Commerc~ Y                     0        0         NA
##  7   93203     1  2017 Gas- Commerc~ Y                     0        0         NA
##  8   93203     2  2017 Gas- Commerc~ Y                     0        0         NA
##  9   93203     3  2017 Gas- Commerc~ Y                     0        0         NA
## 10   93204     1  2017 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,476 more rows
## [1] "PGE_2018_Q1_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2018_Q1_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,499 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1      NA    NA    NA Gas-          <NA>                 NA       NA         NA
##  2   92304     1  2018 Gas- Commerc~ Y                     0        0         NA
##  3   92304     2  2018 Gas- Commerc~ Y                     0        0         NA
##  4   92304     3  2018 Gas- Commerc~ Y                     0        0         NA
##  5   92365     1  2018 Gas- Commerc~ Y                     0        0         NA
##  6   92365     2  2018 Gas- Commerc~ Y                     0        0         NA
##  7   92365     3  2018 Gas- Commerc~ Y                     0        0         NA
##  8   93203     1  2018 Gas- Commerc~ Y                     0        0         NA
##  9   93203     2  2018 Gas- Commerc~ Y                     0        0         NA
## 10   93203     3  2018 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,489 more rows
## [1] "PGE_2019_Q1_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2019_Q1_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,499 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     1  2019 Gas- Commerc~ Y                     0        0         NA
##  2   92304     2  2019 Gas- Commerc~ Y                     0        0         NA
##  3   92304     3  2019 Gas- Commerc~ Y                     0        0         NA
##  4   92365     1  2019 Gas- Commerc~ Y                     0        0         NA
##  5   92365     2  2019 Gas- Commerc~ Y                     0        0         NA
##  6   92365     3  2019 Gas- Commerc~ Y                     0        0         NA
##  7   93203     1  2019 Gas- Commerc~ Y                     0        0         NA
##  8   93203     2  2019 Gas- Commerc~ Y                     0        0         NA
##  9   93203     3  2019 Gas- Commerc~ Y                     0        0         NA
## 10   93204     1  2019 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,489 more rows
## [1] "PGE_2020_Q1_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2020_Q1_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,493 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     1  2020 Gas- Commerc~ Y                     0        0         NA
##  2   92304     2  2020 Gas- Commerc~ Y                     0        0         NA
##  3   92304     3  2020 Gas- Commerc~ Y                     0        0         NA
##  4   92365     1  2020 Gas- Commerc~ Y                     0        0         NA
##  5   92365     2  2020 Gas- Commerc~ Y                     0        0         NA
##  6   92365     3  2020 Gas- Commerc~ Y                     0        0         NA
##  7   93203     1  2020 Gas- Commerc~ Y                     0        0         NA
##  8   93203     2  2020 Gas- Commerc~ Y                     0        0         NA
##  9   93203     3  2020 Gas- Commerc~ Y                     0        0         NA
## 10   93204     1  2020 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,483 more rows
## [1] "PGE_2017_Q2_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2017_Q2_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,489 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     4  2017 Gas- Commerc~ Y                     0        0         NA
##  2   92304     5  2017 Gas- Commerc~ Y                     0        0         NA
##  3   92304     6  2017 Gas- Commerc~ Y                     0        0         NA
##  4   92365     4  2017 Gas- Commerc~ Y                     0        0         NA
##  5   92365     5  2017 Gas- Commerc~ Y                     0        0         NA
##  6   92365     6  2017 Gas- Commerc~ Y                     0        0         NA
##  7   93203     4  2017 Gas- Commerc~ Y                     0        0         NA
##  8   93203     5  2017 Gas- Commerc~ Y                     0        0         NA
##  9   93203     6  2017 Gas- Commerc~ Y                     0        0         NA
## 10   93204     4  2017 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,479 more rows
## [1] "PGE_2018_Q2_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2018_Q2_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,502 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1      NA    NA    NA Gas-          <NA>                 NA       NA         NA
##  2   92304     4  2018 Gas- Commerc~ Y                     0        0         NA
##  3   92304     5  2018 Gas- Commerc~ Y                     0        0         NA
##  4   92304     6  2018 Gas- Commerc~ Y                     0        0         NA
##  5   92365     4  2018 Gas- Commerc~ Y                     0        0         NA
##  6   92365     5  2018 Gas- Commerc~ Y                     0        0         NA
##  7   92365     6  2018 Gas- Commerc~ Y                     0        0         NA
##  8   93203     4  2018 Gas- Commerc~ Y                     0        0         NA
##  9   93203     5  2018 Gas- Commerc~ Y                     0        0         NA
## 10   93203     6  2018 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,492 more rows
## [1] "PGE_2019_Q2_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2019_Q2_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,499 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1      NA    NA    NA Gas-          <NA>                 NA       NA         NA
##  2   92304     4  2019 Gas- Commerc~ Y                     0        0         NA
##  3   92304     5  2019 Gas- Commerc~ Y                     0        0         NA
##  4   92304     6  2019 Gas- Commerc~ Y                     0        0         NA
##  5   92365     4  2019 Gas- Commerc~ Y                     0        0         NA
##  6   92365     5  2019 Gas- Commerc~ Y                     0        0         NA
##  7   92365     6  2019 Gas- Commerc~ Y                     0        0         NA
##  8   93203     4  2019 Gas- Commerc~ Y                     0        0         NA
##  9   93203     5  2019 Gas- Commerc~ Y                     0        0         NA
## 10   93203     6  2019 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,489 more rows
## [1] "PGE_2020_Q2_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2020_Q2_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,461 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     4  2020 Gas- Commerc~ Y                     0        0         NA
##  2   92304     5  2020 Gas- Commerc~ Y                     0        0         NA
##  3   92304     6  2020 Gas- Commerc~ Y                     0        0         NA
##  4   92365     4  2020 Gas- Commerc~ Y                     0        0         NA
##  5   92365     5  2020 Gas- Commerc~ Y                     0        0         NA
##  6   92365     6  2020 Gas- Commerc~ Y                     0        0         NA
##  7   93203     4  2020 Gas- Commerc~ Y                     0        0         NA
##  8   93203     5  2020 Gas- Commerc~ Y                     0        0         NA
##  9   93203     6  2020 Gas- Commerc~ Y                     0        0         NA
## 10   93204     4  2020 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,451 more rows
## [1] "PGE_2017_Q3_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2017_Q3_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_double()
## )
## # A tibble: 3,491 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     7  2017 Gas- Commerc~ Y                     0        0         NA
##  2   92304     8  2017 Gas- Commerc~ Y                     0        0         NA
##  3   92304     9  2017 Gas- Commerc~ Y                     0        0         NA
##  4   92365     7  2017 Gas- Commerc~ Y                     0        0         NA
##  5   92365     8  2017 Gas- Commerc~ Y                     0        0         NA
##  6   92365     9  2017 Gas- Commerc~ Y                     0        0         NA
##  7   93203     7  2017 Gas- Commerc~ Y                     0        0         NA
##  8   93203     8  2017 Gas- Commerc~ Y                     0        0         NA
##  9   93203     9  2017 Gas- Commerc~ Y                     0        0         NA
## 10   93204     7  2017 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,481 more rows
## [1] "PGE_2018_Q3_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2018_Q3_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_double()
## )
## # A tibble: 3,503 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     7  2018 Gas- Commerc~ Y                     0        0         NA
##  2   92304     8  2018 Gas- Commerc~ Y                     0        0         NA
##  3   92304     9  2018 Gas- Commerc~ Y                     0        0         NA
##  4   92365     7  2018 Gas- Commerc~ Y                     0        0         NA
##  5   92365     8  2018 Gas- Commerc~ Y                     0        0         NA
##  6   92365     9  2018 Gas- Commerc~ Y                     0        0         NA
##  7   93203     7  2018 Gas- Commerc~ Y                     0        0         NA
##  8   93203     8  2018 Gas- Commerc~ Y                     0        0         NA
##  9   93203     9  2018 Gas- Commerc~ Y                     0        0         NA
## 10   93204     7  2018 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,493 more rows
## [1] "PGE_2019_Q3_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2019_Q3_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,489 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     7  2019 Gas- Commerc~ Y                     0        0         NA
##  2   92304     8  2019 Gas- Commerc~ Y                     0        0         NA
##  3   92304     9  2019 Gas- Commerc~ Y                     0        0         NA
##  4   92365     7  2019 Gas- Commerc~ Y                     0        0         NA
##  5   92365     8  2019 Gas- Commerc~ Y                     0        0         NA
##  6   92365     9  2019 Gas- Commerc~ Y                     0        0         NA
##  7   93203     7  2019 Gas- Commerc~ Y                     0        0         NA
##  8   93203     8  2019 Gas- Commerc~ Y                     0        0         NA
##  9   93203     9  2019 Gas- Commerc~ Y                     0        0         NA
## 10   93204     7  2019 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,479 more rows
## [1] "PGE_2017_Q4_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2017_Q4_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 4,662 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304     9  2017 Gas- Commerc~ Y                     0        0         NA
##  2   92304    10  2017 Gas- Commerc~ Y                     0        0         NA
##  3   92304    11  2017 Gas- Commerc~ Y                     0        0         NA
##  4   92304    12  2017 Gas- Commerc~ Y                     0        0         NA
##  5   92365     9  2017 Gas- Commerc~ Y                     0        0         NA
##  6   92365    10  2017 Gas- Commerc~ Y                     0        0         NA
##  7   92365    11  2017 Gas- Commerc~ Y                     0        0         NA
##  8   92365    12  2017 Gas- Commerc~ Y                     0        0         NA
##  9   93203     9  2017 Gas- Commerc~ Y                     0        0         NA
## 10   93203    10  2017 Gas- Commerc~ Y                     0        0         NA
## # ... with 4,652 more rows
## [1] "PGE_2018_Q4_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2018_Q4_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,496 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1      NA    NA    NA Gas-          <NA>                 NA       NA         NA
##  2   92304    10  2018 Gas- Commerc~ Y                     0        0         NA
##  3   92304    11  2018 Gas- Commerc~ Y                     0        0         NA
##  4   92304    12  2018 Gas- Commerc~ Y                     0        0         NA
##  5   92365    10  2018 Gas- Commerc~ Y                     0        0         NA
##  6   92365    11  2018 Gas- Commerc~ Y                     0        0         NA
##  7   92365    12  2018 Gas- Commerc~ Y                     0        0         NA
##  8   93203    10  2018 Gas- Commerc~ Y                     0        0         NA
##  9   93203    11  2018 Gas- Commerc~ Y                     0        0         NA
## 10   93203    12  2018 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,486 more rows
## [1] "PGE_2019_Q4_ElectricUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## [1] "PGE_2019_Q4_GasUsageByZip.csv"
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## # A tibble: 3,490 x 8
##    ZIPCODE MONTH  YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS TOTALTHM AVERAGETHM
##      <dbl> <dbl> <dbl> <chr>         <chr>             <dbl>    <dbl>      <dbl>
##  1   92304    10  2019 Gas- Commerc~ Y                     0        0         NA
##  2   92304    11  2019 Gas- Commerc~ Y                     0        0         NA
##  3   92304    12  2019 Gas- Commerc~ Y                     0        0         NA
##  4   92365    10  2019 Gas- Commerc~ Y                     0        0         NA
##  5   92365    11  2019 Gas- Commerc~ Y                     0        0         NA
##  6   92365    12  2019 Gas- Commerc~ Y                     0        0         NA
##  7   93203    10  2019 Gas- Commerc~ Y                     0        0         NA
##  8   93203    11  2019 Gas- Commerc~ Y                     0        0         NA
##  9   93203    12  2019 Gas- Commerc~ Y                     0        0         NA
## 10   93204    10  2019 Gas- Commerc~ Y                     0        0         NA
## # ... with 3,480 more rows

Filtering to Zip Codes in the Bay Area

library(tidyverse)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(leaflet)
library (dplyr)


ca_counties <- counties("CA", cb = T, progress_bar = F)

st_crs(ca_counties)
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
projection <- "+proj=utm +zone=10 +ellps=GRS80 +datum=NAD83 +units=ft +no_defs"

ca_counties_transformed <- 
  ca_counties %>% 
  st_transform(4326) %>% 
  st_transform(26910) %>% 
  st_transform(projection) %>% 
  st_transform(st_crs(ca_counties))

usa_zips <- 
  zctas(cb = T, progress_bar = F)
## ZCTAs can take several minutes to download.  To cache the data and avoid re-downloading in future R sessions, set `options(tigris_use_cache = TRUE)`
bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )

bay_cbgs <- 
  bay_county_names %>% 
  map_dfr(function(county) {
    block_groups("CA", county, cb = T, progress_bar = F)
  })

bay_counties <-
  ca_counties %>%
  filter(NAME %in% bay_county_names)


bay_zips <-
  usa_zips %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(usa_zips %>% select(GEOID10)) %>% 
  st_as_sf()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## Joining, by = "GEOID10"
colnames(pge_elec_gas)
## [1] "ZIPCODE"        "MONTH"          "YEAR"           "CUSTOMERCLASS" 
## [5] "COMBINED"       "TOTALCUSTOMERS" "TOTALKBTU"
pge_rc_bayzips <-
  pge_elec_gas %>% 
  filter(CUSTOMERCLASS %in% c("Gas- Residential", "Elec- Residential", "Gas- Commercial", "Elec- Commercial")) %>% 
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>% 
  group_by(ZIPCODE, CUSTOMERCLASS) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>%  
  ungroup() %>% 
  group_by(CUSTOMERCLASS,MONTH,YEAR) %>% 
  summarize(
    TOTALKBTU = sum(TOTALKBTU, na.rm = T)
  ) 
## `summarise()` regrouping output by 'CUSTOMERCLASS', 'MONTH' (override with `.groups` argument)
class(pge_rc_bayzips$ZIPCODE)
## Warning: Unknown or uninitialised column: `ZIPCODE`.
## [1] "NULL"

Creating New Date Column

library(date)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dplyr)


pge_rc_bayzips$Date <- as.yearmon(paste(pge_rc_bayzips$YEAR, pge_rc_bayzips$MONTH), "%Y %m")

pge_bayzips_date <- pge_rc_bayzips %>% 
  mutate(
    DATE= case_when(
  Date == "Jan 2017" ~ 1,
  Date == "Feb 2017" ~ 2,
  Date == "Mar 2017" ~ 3,
  Date == "Apr 2017" ~ 4,
  Date == "May 2017" ~ 5,
  Date == "Jun 2017" ~ 6,
  Date == "Jul 2017" ~ 7,
  Date == "Aug 2017" ~ 8,
  Date == "Sep 2017" ~ 9,
  Date == "Oct 2017" ~ 10,
  Date == "Nov 2017" ~ 11,
  Date == "Dec 2017" ~ 12,
  Date == "Jan 2018" ~ 13,
  Date == "Feb 2018" ~ 14,
  Date == "Mar 2018" ~ 15,
  Date == "Apr 2018" ~ 16,
  Date == "May 2018" ~ 17,
  Date == "Jun 2018" ~ 18,
  Date == "Jul 2018" ~ 19,
  Date == "Aug 2018" ~ 20,
  Date == "Sep 2018" ~ 21,
  Date == "Oct 2018" ~ 22,
  Date == "Nov 2018" ~ 23,
  Date == "Dec 2018" ~ 24,
  Date == "Jan 2019" ~ 25,
  Date == "Feb 2019" ~ 26,
  Date == "Mar 2019" ~ 27,
  Date == "Apr 2019" ~ 28,
  Date == "May 2019" ~ 29,
  Date == "Jun 2019" ~ 30,
  Date == "Jul 2019" ~ 31,
  Date == "Aug 2019" ~ 32,
  Date == "Sep 2019" ~ 33,
  Date == "Oct 2019" ~ 34,
  Date == "Nov 2019" ~ 35,
  Date == "Dec 2019" ~ 36,
  Date == "Jan 2020" ~ 37,
  Date == "Feb 2020" ~ 38,
  Date == "Mar 2020" ~ 39,
  Date == "Apr 2020" ~ 40,
  Date == "May 2020" ~ 41,
  Date == "Jun 2020" ~ 42,
  TRUE ~ NA_real_
    )
)  

Histogram - 9 Bay Area Counties Monthly Electric & Gas Usage, 2017-2020

library(tidyverse)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(zoo)



pge_bayzips_date <- transform(pge_bayzips_date, Date = as.Date(Date, frac = 1))
 
class(pge_bayzips_date$Date)
## [1] "Date"
pge_bay_chart <-
  pge_bayzips_date %>% 
  ggplot() +
  geom_bar(
    aes(
      x = Date,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "KBTU",
    title = "9 Bay Area Counties Monthly Electric & Gas Usage, 2017-2020",
    fill = "Energy Type"
  ) 

pge_bay_chart %>% 
  ggplotly() %>% 
  layout(
    xaxis = list(fixedrange = T),
    yaxis = list(fixedrange = T)
  ) %>% 
  config(displayModeBar = F) 
## Warning: Removed 1 rows containing missing values (position_stack).

When comparing March of 2019 with March of 2020, there was a notable increase in Residential Electricity usage, which can likely be attributed an influx of people quarantining due to COVID-19. This increase in Residential Electricity usage continued from March through June.

Interestingly, Residential Gas Usage decreased slightly when comparing March 2019 to March 2020. However, Residential Gas Usage increased significantly in April 2020 as compared to April 2019, which could be attributed, again, to the influx of people spending more time at home.

As expected, Commercial Electricity usage was lesser for the first six months of 2020 overall than it was for the first six months of 2019. This is an unsurprising finding, since Bay Area “stay-at-home” orders would likely decrease the need for Commercial Electricity. Conversely, Commercial Gas usage experienced a slight increase during March 2020 and April 2020 compared to the same months in 2019, but then experienced a stark decrease moving into the summer months

Creating Covid Comparison Data:

library(tidyverse)
library(dplyr)
library(sf)
library(tigris)
library(leaflet)

pge_map_zips <-
  pge_elec_gas %>% 
  filter(CUSTOMERCLASS %in% c("Gas- Residential", "Elec- Residential", "Gas- Commercial", "Elec- Commercial")) %>% 
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>% 
  group_by(ZIPCODE, YEAR, CUSTOMERCLASS, MONTH) %>%
  summarize(
    TOTALKBTU = sum(TOTALKBTU, na.rm = T)
  ) %>% 
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  )
## `summarise()` regrouping output by 'ZIPCODE', 'YEAR', 'CUSTOMERCLASS' (override with `.groups` argument)
library(zoo)
pge_map_zips$Date <- as.yearmon(paste(pge_map_zips$YEAR, pge_map_zips$MONTH), "%Y %m")

pge_map_zips$Date <- NULL  


pge_covid <-
  pge_map_zips %>% 
  filter(CUSTOMERCLASS == "Elec- Residential", YEAR %in% 2019:2020, MONTH %in% 3:6) %>%
  pivot_wider(
    names_from = YEAR, 
    values_from = TOTALKBTU,
    values_fill = 0
  ) %>% 
  rename(
    KBTU2019 = "2019",
    KBTU2020 = "2020"
  ) %>%
  mutate(
    KBTU_change = ((KBTU2020 - KBTU2019)/(KBTU2019)) * 100
 ) %>% 
 st_as_sf()

Map - Percentage Change in Residential Electricity Use During COVID-19

library(tidyverse)
library(sf)
library(tigris)
library(leaflet)


  res_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    pge_covid$KBTU_change
)

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = pge_covid,
    fillColor = ~res_pal(KBTU_change),
    color = "white",
    opacity = 0.5,
    fillOpacity = 0.5,
    weight = 1,
    label = ~paste0(
      round(KBTU_change), 
      "KBTU Percentage Change in",
      ZIPCODE
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = pge_covid,
    pal = res_pal,
    values = ~KBTU_change,
    title = "Percentage Change in Residential Electric Energy Use<br>During COVID-19"
  )
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'

Aside from zipcodes belonging to Sonoma County, all of the zipcodes in the Bay Area experienced a positive percentage change in Residential Electricity Use during Covid-19. San Mateo, Santa Clara, Palo Alto, and Loma Mar did not experience a percentage change in Residential Electricity Use at all, though Stanford experiences a 8% change in Residential Electricity Use during COVID-19. A key assumption that is relevant in this analysis is the fact that these changes in Electricity occured during a state mandated lock-down that necessitated people remain home.